
do "E:/ReplicateBuild/02_code/00_environment/00_set_environment.do"

*Table of Contents
local basedata = 1
local exporter = 1
	global choicesExporter = 1
	global interviewsExporter = 1
	global teachVarExporter = 1
	global schoolVarExporter = 1

*** set the buffers
global bufferpre = 0
global bufferpost = 0


if `basedata' == 1 {

***********************************************
* prepare several datasets for future merging
***********************************************

* list of teacher-jobs that generated applications

use "$basedata/FOCAL_applications_data", clear

assert applied_date!=.
gen applied = 1

gen interview = inlist(application_outcome_category,"hired","offer_declined","interview")

tab interview

gen hired = inlist(application_outcome_category,"hired")
gen offer_declined = inlist(application_outcome_category,"offer_declined")
gen withdrew = inlist(application_outcome_category,"withdrew")
gen positive_assessment = interview | inlist(application_outcome_category,"positive_assessment")
gen negative_assessment = inlist(application_outcome_category,"negative_assessment","negative_later_assessment")

drop if applied_date>mdy(8,31,app_year)

collapse (sum) applied interview hired offer_declined withdrew positive_assessment negative_assessment (min) applied_date (first) application_outcome_category, by(app_year applicant_id job_id)
assert applied==1 // one application per position
assert interview==0 | interview==1
drop applied

tempfile tempappliedjobs

save `tempappliedjobs', replace

* teachers matched to current school and whether an internal applicant

use "$basedata/va_homogeneous_estimates_drift_classmeans_cfr", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_cfr = mu_homog_ma_cfr
gen mu_jt_m2_hat_preY_ma_cfr = mu_homog_ma_cfr
collapse (mean) mu_jt_m1_hat_preY_ma_cfr mu_jt_m2_hat_preY_ma_cfr, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile tempcfr
save "`tempcfr'", replace


use "$basedata/va_homogeneous_estimates_drift", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_homog = mu_jt_hat_preY_ma
gen mu_jt_m2_hat_preY_ma_homog = mu_jt_hat_preY_ma
collapse (mean) mu_jt_m1_hat_preY_ma_homog mu_jt_m2_hat_preY_ma_homog, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile temphomog
save "`temphomog'", replace

use "$basedata/va_estimates_drift_schoolMeans", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_sM = mu_jt_m1_hat_preY_ma
gen mu_jt_m2_hat_preY_ma_sM = mu_jt_m2_hat_preY_ma
collapse (mean) mu_jt_m1_hat_preY_ma_sM mu_jt_m2_hat_preY_ma_sM, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile tempsM
save "`tempsM'", replace

use "$basedata/FOCAL_applicant_year_data", clear

gen obsnum = _n
sort ncerdc_id sy
merge n:1 ncerdc_id sy using `tempcfr'
drop if _m==2
drop _m

merge n:1 ncerdc_id sy using `temphomog'
drop if _m==2
drop _m

merge n:1 ncerdc_id sy using `tempsM'
drop if _m==2
drop _m

sort obsnum
drop obsnum


qui summ sy
local y1 = r(min)
local y2 = r(max)

global y1 = `y1'
global y2 = `y2'

forvalues yy=`y1'/`y2' {
	gen school`yy' = ncerdc_schlcode if sy==`yy'
	gen lea`yy' = ncerdc_lea if sy==`yy'

	gen va_m1_ma_`yy' = (mu_jt_m1_hat_career_ma) if sy==`yy'
	gen va_m2_ma_`yy' = (mu_jt_m2_hat_career_ma) if sy==`yy'

	foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
	gen va_m1_ma_`yy'_preY`suff2' = (mu_jt_m1_hat_preY_ma`suff2') if sy==`yy'
	gen va_m2_ma_`yy'_preY`suff2' = (mu_jt_m2_hat_preY_ma`suff2') if sy==`yy'
	}

}


* for each applicant ID, keep Accountability ID, whether internal, school for each year, location

keep applicant_id FOCALEmployeeID ncerdc_id zip5 teacher_app_latitude teacher_app_longitude internal school2* lea2* va_m*_2*

collapse (min) school2* lea2* va_m*, by(applicant_id FOCALEmployeeID ncerdc_id zip5 teacher_app_latitude teacher_app_longitude internal)

isid applicant_id

tempfile temppastschools
save `temppastschools', replace


***********************************************
* construct teacher application window
***********************************************

use "$basedata/FOCAL_applications_data", clear

drop if applied_date>mdy(8,31,app_year)

gen applied_date_restricted = applied_date if (base_sample==1 | tlp!=0) & job_at_school==1

collapse (min) first_person_app=applied_date first_person_app_restricted=applied_date_restricted (max) last_person_app=applied_date, by(app_year applicant_id)
replace first_person_app = floor(first_person_app)
replace first_person_app_restricted = floor(first_person_app_restricted)
replace last_ = floor(last_)
format first_* last_ %td

tempfile tempteacherwindow
save `tempteacherwindow', replace

***********************************************
* construct job application window
***********************************************
use "$basedata/FOCAL_school_year_data", clear
keep ncerdc_lea ncerdc_schlcode sy school_latitude school_longitude 
keep if ncerdc_lea==XXX // hiding district
destring school_latitude, replace
destring school_longitude, replace

preserve
	drop if missing(school_latitude) | missing(school_longitude)
	sort ncerdc_lea ncerdc_schlcode sy
	bysort ncerdc_lea ncerdc_schlcode: keep if _n==_N
	keep ncerdc_lea ncerdc_schlcode school_latitude school_longitude
	
	tempfile temp_fixed_cords
	save `temp_fixed_cords', replace
restore

keep ncerdc_lea ncerdc_schlcode

duplicates drop
drop if missing(ncerdc_schlcode)
isid ncerdc_schlcode

merge m:1 ncerdc_lea ncerdc_schlcode using `temp_fixed_cords'
drop _merge

tempfile templocations
save `templocations', replace

use "$basedata/FOCAL_school_year_data", clear
collapse (mean) p_school_*, by(ncerdc_lea ncerdc_schlcode sy)

tempfile tempDISAD
save `tempDISAD', replace

* start with when applications came in for each job

use "$basedata/FOCAL_applications_data", clear

*keep generic elementary jobs
keep if base_sample==1 | tlp!=0

* keep if it's a school
keep if job_at_school==1

* merge on location
sort ncerdc_lea ncerdc_schlcode
merge n:1 ncerdc_lea ncerdc_schlcode using `templocations'
assert _m==3 if job_at_school==1
drop if _m==2
drop _m

* merge on fraction DISAD
gen sy = app_year+1
sort ncerdc_lea ncerdc_schlcode sy
merge n:1 ncerdc_lea ncerdc_schlcode sy using `tempDISAD'
drop if _m==1 | _m==2
drop _m

keep earliest_app latest_app ncerdc_schlcode school_latitude school_longitude tlp tlp_pool tlp_job tlp_school p_school* job_school tlp_pos_type job_id app_year

duplicates drop
isid job_id app_year

replace earliest_app = floor(earliest_app)
replace latest_app = floor(latest_app)
format earliest_app latest_app %td

replace latest_app = min(latest_app,mdy(8,31,app_year)) // bound at August 31

gen PostingDate = earliest_app
gen CloseDate = latest_app

***********************************************
* determine choice set
***********************************************

** decision rule:
 * decision: vacancy is open [posting date, closing date], person applying from [earlist_person_app, last_person_app]
 * hence, if closing date < earliest person app - $bufferpre (for some wiggle room), couldn't have applied
 * if posting date > last person app + $bufferpost (for some wiggle room), couldn't have applied (we think people aren't active before they apply)

 
sort app_year job_id
joinby app_year using `tempteacherwindow', unmatched(both) _merge(_m)
*assert _m==3
assert _m!=1
drop if _m==2
drop _m

gen choiceset = 1
* eliminate from choice set positions that closed before the teacher's window
replace choiceset = 0 if (CloseDate < first_person_app - $bufferpre)
* eliminate from choice set positions that opened after the teacher's window
replace choiceset = 0 if (PostingDate > last_person_app + $bufferpost)

gen choicesetday1 = 1
replace choicesetday1 = 0 if (CloseDate < first_person_app_restricted)
replace choicesetday1 = 0 if (PostingDate > first_person_app_restricted)


* merge on the information about which jobs teachers applied to
  
keep app_year applicant_id job_id choiceset* ncerdc_schlcode school_latitude school_longitude PostingDate CloseDate first_person_app last_person_app job_school tlp tlp_pool tlp_job p_school* tlp_pos_type tlp_school
sort app_year applicant_id job_id
merge 1:1 app_year applicant_id job_id using `tempappliedjobs'
tab _merge
  
  
gen applied = (_merge==3)
drop _merge

tab choiceset applied

assert choiceset==1 if applied==1

* drop teachers who don't apply to any positions in our base sample
bys applicant_id app_year: egen numapps = sum(applied)
drop if numapps==0
drop numapps

* merge on teacher information to adjust for current school

sort applicant_id
merge n:1 applicant_id using `temppastschools'
drop if _m==2
gen in_accountability = FOCALEmployeeID!=.
drop _m

ren internal internal_app
bys applicant_id app_year: egen internal = max(internal_app)
drop internal_app

tab internal, m
replace internal = 0 if internal==.


* count current school as being in choice set and applied==0

gen already_teaching = 0
gen current_school = 0
gen current_lea = 0
gen has_va_ma = 0
gen va_ma = .

foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
	gen va_ma_preY`suff2' = .
	gen has_va_ma_preY`suff2' = .
}

gen next_school = 0
gen num_years_at_school = 0 // count number of years at next school

foreach suff3 in "_cfr" "_homog" "_sM" {
	cap gen p_school_m1_ma`suff3' = p_school_m1_ma
	cap gen p_school_m2_ma`suff3' = p_school_m2_ma
}


qui summ app_year
local y1 = r(min)
local y2 = min(2018,r(max))

forvalues yy=`y1'/`y2' {
	local yyplus1 = `yy'+1
	replace already_teaching = 1 if app_year==`yy' & school`yy'!=.
	replace current_school = 1 if app_year==`yy' & school`yy'==ncerdc_schlcode
	replace current_lea = 1 if app_year==`yy' & lea`yy'==XXX // hide identity
	cap replace va_ma = va_m1_ma_`yyplus1'*p_school_m1_ma + va_m2_ma_`yyplus1'*p_school_m2_ma if app_year==`yy'
	replace has_va_ma = (va_ma!=.) if app_year==`yy'
	
	foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
	cap replace va_ma_preY`suff2' = va_m1_ma_`yyplus1'_preY`suff2'*p_school_m1_ma`suff2' + va_m2_ma_`yyplus1'_preY`suff2'*p_school_m2_ma`suff2' if app_year==`yy'
	replace has_va_ma_preY`suff2' = (va_ma_preY`suff2'!=.) if app_year==`yy'
	}
	
	if `yy'<`y2' {
	replace next_school = 1 if app_year==`yy' & school`yyplus1'==ncerdc_schlcode
	}
	forvalues zz=${y1}/${y2} {
	replace num_years_at_school = num_years_at_school+1 if app_year==`yy' & `zz'>`yy' & school`zz'==ncerdc_schlcode
	
	}
}
gen max_years_at_school = max(0,${y2}-app_year)
replace va_ma = 0 if has_va_ma==0

	foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
	  replace va_ma_preY`suff2' = 0 if has_va_ma_preY`suff2'==0  
	}


gen va_rd_preY = 0

drop va_m1* va_m2* p_school*

drop lea20* school20*

replace choiceset = 1 if current_school==1
replace choicesetday1 = 1 if current_school==1

* calculate distance
geodist school_latitude school_longitude teacher_app_latitude teacher_app_longitude if school_latitude!=. & school_longitude!=. & teacher_app_latitude!=. & teacher_app_longitude!=., gen(dist)
replace dist = round(dist,.01)


replace school_latitude = round(school_latitude,.00001)
replace school_longitude = round(school_longitude,.00001)


sort zip5 school_latitude school_longitude
merge n:1 zip5 school_latitude school_longitude using "$auxdata/FOCAL_commute_times", keepusing(commute_time commute_distance)
drop if _m==2
drop _merge

replace commute_time = commute_time/60
replace commute_distance = commute_distance/1000


*drop school_latitude school_longitude teacher_app_latitude teacher_app_longitude zip5


drop if ncerdc_schlcode==.

* create indices

egen teacher_index = group(FOCALEmployeeID)
egen applicant_index = group(applicant_id)

qui summ teacher_index
local nt = r(max)
gen teacher_applicant_var = teacher_index
replace teacher_applicant_var = `nt'+applicant_index if teacher_index==.

egen full_teacher_index = group(teacher_applicant_var)
drop teacher_applicant_var

* create index for all teacher IDs combinations
gen teacher_index_nonmiss = teacher_index
gen applicant_index_nonmiss = applicant_index
gen ncerdc_id_nonmiss = ncerdc_id 
replace teacher_index_nonmiss = -100 if teacher_index_nonmiss==.
replace applicant_index_nonmiss = -100 if applicant_index_nonmiss==.
replace ncerdc_id_nonmiss = -100 if ncerdc_id_nonmiss==.

egen full_teacher_all_index = group(teacher_index_nonmiss applicant_index_nonmiss ncerdc_id_nonmiss)
drop *_nonmiss

preserve
keep FOCALEmployeeID applicant_id teacher_index applicant_index full_teacher_index
duplicates drop
isid applicant_index
sort FOCALEmployeeID applicant_id
save "$basedata/estimationdata/full_teacher_index", replace
restore

preserve
keep FOCALEmployeeID applicant_id teacher_index applicant_index full_teacher_index full_teacher_all_index
duplicates drop
isid applicant_index
sort FOCALEmployeeID applicant_id
save "$basedata/estimationdata/full_teacher_all_index", replace
restore

egen school_index = group(ncerdc_schlcode)
egen job_index = group(job_id)

preserve
keep ncerdc_schlcode job_id school_index job_index
duplicates drop
isid job_index
sort ncerdc_schlcode job_id
save "$basedata/estimationdata/full_job_index", replace
restore

save "$temp/choicesetdata_buff_${bufferpre}_${bufferpost}", replace

}

if `exporter' == 1 {



cap program drop exporter
program exporter

if ${choicesExporter} == 1 {

use "$temp/choicesetdata_buff_${bufferpre}_${bufferpost}", clear

bys app_year: egen any_va_ma = max(has_va_ma)
keep if any_va_ma==1
drop any_va_ma

* define variables for aggregation

if "`2'" == "applicant" {
	local ivar = "applicant_index"
}
if "`2'" == "teacher" {
	local ivar = "full_teacher_all_index"
}
if "`3'" == "job" {
	local jvar = "job_index"
}
if "`3'" == "school" {
	local jvar = "school_index"
}

foreach var in "interview" "hired" "offer_declined" "withdrew" "positive_assessment" "negative_assessment" {
	replace `var' = 0 if `var'==.
}


collapse (max) choiceset* applied interview internal already_teaching current_school current_lea next_school in_accountability has_va_ma* num_years_at_school max_years_at_school ///
	last_person_app CloseDate (min) dist commute_time commute_distance va_ma va_ma_preY* va_rd_preY* ///
	hired offer_declined withdrew positive_assessment negative_assessment ///
	first_person_app PostingDate applied_date job_id tlp tlp_pool tlp_job tlp_school (first) tlp_pos_type application_outcome_category, by(app_year `ivar' `jvar' applicant_id FOCALEmployeeID ncerdc_id ncerdc_schlcode full_teacher_index school_index)

gen distmiss = (dist==.)
bys app_year `ivar': egen distevermiss = max(distmiss)

if "`1'" == "internalonly" {
	keep if internal==1
}
if "`1'" == "internalaccount" {
	keep if internal==1 & distevermiss==0 & in_accountability==1
}
if "`1'" == "internalaccountVA" {
	keep if internal==1 & distevermiss==0 & in_accountability==1 & has_va_ma_preY==1
}
if "`1'" == "internalaccountteaching" {
	keep if internal==1 & distevermiss==0 & in_accountability==1 & already_teaching==1
}
if "`1'" == "internalaccountteachingVA" {
	keep if internal==1 & distevermiss==0 & in_accountability==1 & already_teaching==1 & has_va_ma_preY==1
}

if "`5'" == "" {
	drop if tlp==1
}

* only keep teachers/applicants and schools/positions that have at least one application
bys app_year `ivar': egen numappsi = sum(applied)
bys app_year `jvar': egen numappsj = sum(applied)
keep if numappsi > 0 & numappsj > 0
drop numappsi numappsj

sort app_year `ivar' `jvar'

egen itidx = group(app_year `ivar')
egen jtidx = group(app_year `jvar')
egen iidx = group(`ivar')
egen jidx = group(`jvar')

egen teachidx = group(full_teacher_index)
egen schoolidx = group(school_index)


* create year-specific index
bys app_year: egen mini = min(itidx)
bys app_year: egen minj = min(jtidx)

gen yitidx = itidx-mini+1
gen yjtidx = jtidx-minj+1
drop mini minj

sort itidx jtidx

keep itidx jtidx yitidx yjtidx applied interview choiceset iidx jidx dist teachidx schoolidx full_teacher_index school_index current_school current_lea next_school applicant_id FOCALEmployeeID ncerdc_id ncerdc_schlcode job_id app_year ///
	commute_time commute_distance num_years_at_school max_years_at_school already_teaching choicesetday1 ///
	CloseDate PostingDate first_person_app last_person_app applied_date tlp tlp_pool tlp_job tlp_pos_type tlp_school ///
	hired offer_declined withdrew positive_assessment negative_assessment application_outcome_category has_va_ma* va_ma va_ma_preY* va_rd_preY*

save "$basedata/estimationdata/`4'/`4'_choices_buff_${bufferpre}_${bufferpost}`5'", replace


foreach var in "dist" "commute_time" "commute_distance"  {

replace `var' = -999 if `var'==. // denote missing values for Matlab

}

keep itidx jtidx applied interview choiceset iidx jidx dist teachidx schoolidx app_year choicesetday1 yitidx yjtidx commute_time commute_distance ///
 hired offer_declined withdrew positive_assessment negative_assessment has_va_ma_preY va_ma_preY va_rd_preY
order itidx jtidx applied interview choiceset iidx jidx dist teachidx schoolidx app_year choicesetday1 yitidx yjtidx commute_time commute_distance ///
 hired offer_declined withdrew positive_assessment negative_assessment has_va_ma_preY va_ma_preY va_rd_preY

outsheet using "$basedata/estimationdata/`4'/`4'_choices_buff_${bufferpre}_${bufferpost}`5'.csv", comma replace

}

if ${interviewsExporter} == 1 {

* create dataset for interview choice

use "$basedata/estimationdata/`4'/`4'_choices_buff_${bufferpre}_${bufferpost}`5'", clear
keep if applied==1
bys jtidx: egen numInterviews = sum(interview)
gen numInterviews1 = numInterviews + (numInterviews==0)
qui summ numInterviews1 if jtidx!=jtidx[_n-1]
local maxInterviews = r(max)

set seed 348399
gen randsort = runiform()

gsort jtidx -interview randsort
gen interview_num = 0 if interview==0
replace interview_num = 1 if jtidx!=jtidx[_n-1] & interview==1
replace interview_num = interview_num[_n-1]+1 if interview_num==. & interview==1 & jtidx==jtidx[_n-1]
assert interview_num!=.
expand numInterviews1, gen(newobs)
drop numInterviews1
sort jtidx itidx
gen ctidx = 1 if jtidx!=jtidx[_n-1] | itidx!=itidx[_n-1]
replace ctidx = ctidx[_n-1]+1 if jtidx==jtidx[_n-1] & itidx==itidx[_n-1]
assert ctidx!=.

drop if interview_num>0 & interview_num!=ctidx // for multiple choices, remove them from model when other choices are made

* new indices
sort jtidx ctidx itidx
egen jctidx = group(jtidx ctidx)
sort jctidx itidx
gen jcitidx = 1 if jctidx!=jctidx[_n-1] | itidx!=itidx[_n-1]
replace jcitidx = jcitidx[_n-1]+1 if jctidx==jctidx[_n-1]
assert jcitidx!=.

bys jctidx: egen numInterviewsCheck = sum(interview)
assert numInterviewsCheck==0 | numInterviewsCheck==1
drop numInterviewsCheck

keep jctidx jcitidx jtidx ctidx itidx interview numInterviews app_year ncerdc_schlcode applicant_id FOCALEmployeeID ncerdc_id job_id

save "$basedata/estimationdata/`4'/`4'_interviews_buff_${bufferpre}_${bufferpost}`5'", replace

keep jctidx jcitidx jtidx ctidx itidx interview numInterviews
order jctidx jcitidx jtidx ctidx itidx interview numInterviews

outsheet using "$basedata/estimationdata/`4'/`4'_interviews_buff_${bufferpre}_${bufferpost}`5'.csv", comma replace

}


if ${teachVarExporter} == 1 {



* create dataset with teacher characteristics


use "$basedata/estimationdata/`4'/`4'_choices_buff_${bufferpre}_${bufferpost}`5'", clear

keep applicant_id FOCALEmployeeID ncerdc_id app_year itidx iidx
duplicates drop
sort applicant_id FOCALEmployeeID ncerdc_id app_year
tempfile tempindices

save `tempindices', replace

use "$basedata/FOCAL_applicant_data", clear

keep applicant_id gender ethnicity dob praxis praxisMissing grad_degree grad_degreeMissing certified certifiedMissing licensed licensedMissing
sort applicant_id
tempfile tempteacher
save `tempteacher', replace

use "$basedata/FOCAL_school_year_data", clear
keep ncerdc_lea ncerdc_schlcode sy titleI frac_black p_school_m1_ma p_school_m1_ma_race p_school_m1_ma_ach
bys sy: egen p_school_m1_ma_all = mean(p_school_m1_ma)
bys sy: egen p_school_m1_ma_race_all = mean(p_school_m1_ma_race)
bys sy: egen p_school_m1_ma_ach_all = mean(p_school_m1_ma_ach)
gen p_school_m1_ma_cfr_all = p_school_m1_ma_all
gen p_school_m1_ma_homog_all = p_school_m1_ma_all
gen p_school_m1_ma_sM_all = p_school_m1_ma_all
drop p_school_m1_ma p_school_m1_ma_race p_school_m1_ma_ach
sort ncerdc_lea ncerdc_schlcode sy
tempfile tempteacherschools

save `tempteacherschools', replace

use "$basedata/FOCAL_applications_data", clear

drop if applied_date>mdy(8,31,app_year)

use "$basedata/FOCAL_applications_data", clear

gen applied_date_restricted = applied_date if (base_sample==1 | tlp!=0) & job_at_school==1


collapse (min) first_person_app=applied_date first_person_app_restricted=applied_date_restricted (max) last_person_app=applied_date, by(applicant_id app_year)
replace first_person_app = floor(first_person_app)
replace first_person_app_restricted = floor(first_person_app_restricted)
replace last_ = floor(last_)
format first_* last_ %td

tempfile tempteacherwindow
save `tempteacherwindow', replace


use "$basedata/va_homogeneous_estimates_drift_classmeans_cfr", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_cfr = mu_homog_ma_cfr
gen mu_jt_m2_hat_preY_ma_cfr = mu_homog_ma_cfr
collapse (mean) mu_jt_m1_hat_preY_ma_cfr mu_jt_m2_hat_preY_ma_cfr, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile tempcfr
save "`tempcfr'", replace

use "$basedata/va_homogeneous_estimates_drift", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_homog = mu_jt_hat_preY_ma
gen mu_jt_m2_hat_preY_ma_homog = mu_jt_hat_preY_ma
collapse (mean) mu_jt_m1_hat_preY_ma_homog mu_jt_m2_hat_preY_ma_homog, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile temphomog
save "`temphomog'", replace

use "$basedata/va_estimates_drift_schoolMeans", clear
gen ncerdc_id = j
gen sy = t
gen mu_jt_m1_hat_preY_ma_sM = mu_jt_m1_hat_preY_ma
gen mu_jt_m2_hat_preY_ma_sM = mu_jt_m2_hat_preY_ma
collapse (mean) mu_jt_m1_hat_preY_ma_sM mu_jt_m2_hat_preY_ma_sM, by(ncerdc_id sy)

assert ncerdc_id!=.

tempfile tempsM
save "`tempsM'", replace

use "$basedata/FOCAL_applicant_year_data", clear

gen obsnum = _n
sort ncerdc_id sy
merge n:1 ncerdc_id sy using `tempcfr'
drop if _m==2
drop _m

merge n:1 ncerdc_id sy using `temphomog'
drop if _m==2
drop _m

merge n:1 ncerdc_id sy using `tempsM'
drop if _m==2
drop _m

sort obsnum
drop obsnum


egen applicant_id_num = group(applicant_id)
xtset applicant_id_num sy

ren index evaas_score

gen FOCAL = ncerdc_lea==XXX // hide identity
gen FOCAL_sy = sy if FOCAL==1 & ncerdc_id!=. & ncerdc_id!=0
bys ncerdc_id: egen first_yr_FOCAL = min(FOCAL_sy)
bys ncerdc_id: egen last_yr_FOCAL = max(FOCAL_sy)

sort applicant_id_num sy


keep applicant_id* FOCALEmployeeID ncerdc_id sy evaas_score evaas_exceeds evaas_meets evaas_not_meets ncerdc_exp ncerdc_lea ncerdc_schlcode ///
	mu_jt_m1_hat_career_ma mu_jt_m2_hat_career_ma ///
	mu_jt_m1_hat_preY_ma* mu_jt_m2_hat_preY_ma* first_yr_FOCAL last_yr_FOCAL tchr_exp_pay_level
	
foreach var in tchr_exp_pay_level {

gen `var'_lagged = `var'[_n-1] if applicant_id_num==applicant_id_num[_n-1] & sy==sy[_n-1]+1
drop `var'
ren `var'_lagged `var'

}


ren mu_jt_m1_hat_career_ma mu_jt_m1_hat_career_ma_prior
ren mu_jt_m2_hat_career_ma mu_jt_m2_hat_career_ma_prior

gen transfer = (ncerdc_lea!=L.ncerdc_lea | ncerdc_schlcode!=L.ncerdc_schlcode) if applicant_id_num==L.applicant_id_num & sy==L.sy+1

sort ncerdc_lea ncerdc_schlcode sy
merge n:1 ncerdc_lea ncerdc_schlcode sy using `tempteacherschools'
drop if _m==2
drop _m ncerdc_lea ncerdc_schlcode

sort applicant_id sy

ren titleI teach_title_I
ren frac_black teach_frac_black


ren sy app_year
replace app_year = app_year-1
sort applicant_id app_year

preserve
gen evaas_score_F1 = evaas_score
gen teach_title_I_F1 = teach_title_I
gen teach_frac_black_F1 = teach_frac_black

replace app_year = app_year-1
keep applicant_id app_year *_F1

tempfile tempF1
save `tempF1', replace
restore



merge n:1 applicant_id using `tempteacher'
assert _m!=1
keep if _m==3
drop _m

sort applicant_id app_year
merge 1:n applicant_id FOCALEmployeeID ncerdc_id app_year using `tempindices'
drop if _m==1
drop _m

sort applicant_id app_year
merge 1:n applicant_id app_year using `tempteacherwindow'
drop if _m==2
drop _m

sort applicant_id app_year
merge n:1 applicant_id app_year using `tempF1'
drop if _m==2
drop _m

sort itidx

gen female = (gender=="F") if gender!=""
gen black = (ethnicity=="B") if ethnicity!=""
gen hispanic = (ethnicity=="H") if ethnicity!=""

cap gen p_school_m1_ma_cfr_all = p_school_m1_ma_all
cap gen p_school_m1_ma_homog_all = p_school_m1_ma_all
cap gen p_school_m1_ma_sM_all = p_school_m1_ma_all

* spread out shares that didn't fill in with merges
foreach suff2 in "" "_race" "_ach" "_cfr" "_homog" "_sM" {
ren p_school_m1_ma`suff2'_all ptemp
bys app_year: egen p_school_m1_ma`suff2'_all = mean(ptemp)
drop ptemp

gen mu_jt_preY_mean_school_ma`suff2' = mu_jt_m1_hat_preY_ma`suff2'*p_school_m1_ma`suff2'_all + mu_jt_m2_hat_preY_ma`suff2'*(1-p_school_m1_ma`suff2'_all)

}

gen mu_jt_mean_school_ma = mu_jt_m1_hat_career_ma*p_school_m1_ma_all + mu_jt_m2_hat_career_ma*(1-p_school_m1_ma_all)



save "$basedata/estimationdata/`4'/`4'_teachvar_buff_${bufferpre}_${bufferpost}`5'", replace


foreach var in "ncerdc_exp" "evaas_score" "evaas_exceeds" "evaas_meets" "evaas_not_meets" "transfer" "teach_frac_black" "teach_title_I" "dob" "female" "black" "hispanic" "evaas_score_F1" "teach_title_I_F1" "teach_frac_black_F1" "mu_jt_m1_hat_career_ma" "mu_jt_m2_hat_career_ma" "mu_jt_mean_school_ma" "tchr_exp_pay_level" {
	replace `var' = -999 if `var'==.
}




keep itidx app_year female hispanic black ncerdc_exp evaas_score evaas_exceeds evaas_meets evaas_not_meets transfer teach_frac_black teach_title_I evaas_score_F1 teach_title_I_F1 teach_frac_black_F1 mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_preY_mean_school_ma tchr_exp_pay_level
order itidx app_year female hispanic black ncerdc_exp evaas_score evaas_exceeds evaas_meets evaas_not_meets transfer teach_frac_black teach_title_I evaas_score_F1 teach_title_I_F1 teach_frac_black_F1 mu_jt_m1_hat_preY_ma mu_jt_m2_hat_preY_ma mu_jt_preY_mean_school_ma tchr_exp_pay_level

outsheet using "$basedata/estimationdata/`4'/`4'_teachvar_buff_${bufferpre}_${bufferpost}`5'.csv", comma replace

}

if ${schoolVarExporter} == 1 {

* create dataset with school characteristics

use "$basedata/estimationdata/`4'/`4'_choices_buff_${bufferpre}_${bufferpost}`5'", clear
keep ncerdc_schlcode app_year jtidx jidx
duplicates drop
sort ncerdc_schlcode app_year
tempfile tempindices
save `tempindices', replace

use "$basedata/FOCAL_school_year_data", clear
keep if ncerdc_lea==XXX // hide identity
keep ncerdc_schlcode sy titleI frac_black frac_hisp school_exceeded school_met school_notmet attendance_rate tlp_school ///
	p_school_m1_ma* p_school_m2_ma* mean_n1_ma* mean_n2_ma*
replace sy = sy-1
ren sy app_year
sort ncerdc_schlcode app_year

merge 1:n ncerdc_schlcode app_year using `tempindices'
drop if _m==1
drop _m


sort jtidx

save "$basedata/estimationdata/`4'/`4'_schoolvar_buff_${bufferpre}_${bufferpost}`5'", replace

foreach var in "titleI" "frac_black" "frac_hisp" "school_exceeded" "school_met" "school_notmet" "attendance_rate" "tlp_school" "p_school_m1_ma" "p_school_m2_ma" {
	replace `var' = -999 if `var'==.
}

keep jtidx app_year titleI frac_black frac_hisp school_exceeded school_met school_notmet attendance_rate tlp_school p_school_m1_ma p_school_m2_ma
order jtidx app_year titleI frac_black frac_hisp school_exceeded school_met school_notmet attendance_rate tlp_school p_school_m1_ma p_school_m2_ma

outsheet using "$basedata/estimationdata/`4'/`4'_schoolvar_buff_${bufferpre}_${bufferpost}`5'.csv", comma replace

}

end


exporter all teacher job All_T_J "_lf"

}
